x <- c('a', 'b', 'c')
y <- c('c', 'b', 'a')
# logical operator: asks the program for a match
x == y## [1] FALSE TRUE FALSE
search()## [1] ".GlobalEnv" "package:stats" "package:graphics"
## [4] "package:grDevices" "package:utils" "package:datasets"
## [7] "package:methods" "Autoloads" "package:base"
# value matching: contains same variables
x %in% y## [1] TRUE TRUE TRUE
x %in% letters## [1] TRUE TRUE TRUE
Example: (take Iris data, subset, and then aggregate)
iris %>%
subset (Sepal.Length < 5) %>%
aggregate(. ~ Species, ., mean) library(tidyverse)data <- data.frame(x, y)
as_tibble(data)# [] subset a variable
## [,x] = column
new_data <- data [,2]
as_tibble(new_data)## [x,] = row
new_data2 <- data[2,]
as.tibble(new_data2)## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## Install Packages from CRAN
install.packages("dplyr")
## Install Package from GitHub
install.packages("devtools")
devtools::install_github("DeveloperName/PackageName")
devtools::install_github("RandiLGarcia/dyadr")# Load Packages
library(dplyr)
# One Way to Load Multiple Packages
pkgs <- c("psych","tidyr","tidyverse","dplyr","haven","lm.beta","car","Hmisc","skimr","janitor", "labelled", "expss", "foreign")
lapply(pkgs, library, character.only = TRUE)search()## [1] ".GlobalEnv" "package:foreign" "package:expss"
## [4] "package:labelled" "package:janitor" "package:skimr"
## [7] "package:Hmisc" "package:Formula" "package:survival"
## [10] "package:lattice" "package:car" "package:carData"
## [13] "package:lm.beta" "package:haven" "package:psych"
## [16] "package:forcats" "package:stringr" "package:dplyr"
## [19] "package:purrr" "package:readr" "package:tidyr"
## [22] "package:tibble" "package:ggplot2" "package:tidyverse"
## [25] "package:stats" "package:graphics" "package:grDevices"
## [28] "package:utils" "package:datasets" "package:methods"
## [31] "Autoloads" "package:base"
?mean
?dplyr::mutate## CSV
# Saved in the same folder
basic_df <- read.csv("depression_example_data.csv", stringsAsFactors = FALSE) # character strings will not be converted to factors
tibble_df <- read_csv("depression_example_data.csv") # reads as tibble
# Saved in different places
# Option 1 - Set working directory
getwd()
setwd("/Users/kareenadelrosario/Desktop/Local R Code/NewFolder")
read_csv("csvFileName.csv")
# Option 2 - Include file path
read_csv("/Users/kareenadelrosario/Desktop/Local R Code/NewFolder/csvFileName.csv")
# Option 3 - Choose file
read.csv(file.choose(), header = TRUE)
read_sav(file.choose())
read_sas(file.choose())
# Option 4 - Use Menu
# file -> Import Datasetlibrary(gapminder)colnames(gapminder)## [1] "country" "continent" "year" "lifeExp" "pop" "gdpPercap"
skim(gapminder)| Name | gapminder |
| Number of rows | 1704 |
| Number of columns | 6 |
| _______________________ | |
| Column type frequency: | |
| factor | 2 |
| numeric | 4 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| country | 0 | 1 | FALSE | 142 | Afg: 12, Alb: 12, Alg: 12, Ang: 12 |
| continent | 0 | 1 | FALSE | 5 | Afr: 624, Asi: 396, Eur: 360, Ame: 300 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| year | 0 | 1 | 1979.50 | 17.27 | 1952.00 | 1965.75 | 1979.50 | 1993.25 | 2007.0 | ▇▅▅▅▇ |
| lifeExp | 0 | 1 | 59.47 | 12.92 | 23.60 | 48.20 | 60.71 | 70.85 | 82.6 | ▁▆▇▇▇ |
| pop | 0 | 1 | 29601212.32 | 106157896.74 | 60011.00 | 2793664.00 | 7023595.50 | 19585221.75 | 1318683096.0 | ▇▁▁▁▁ |
| gdpPercap | 0 | 1 | 7215.33 | 9857.45 | 241.17 | 1202.06 | 3531.85 | 9325.46 | 113523.1 | ▇▁▁▁▁ |
glimpse(gapminder)## Rows: 1,704
## Columns: 6
## $ country <fct> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", …
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, …
## $ year <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, …
## $ lifeExp <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 40.8…
## $ pop <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372, 12…
## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.1134, …
# View class of each variable
lapply(gapminder, class)## $country
## [1] "factor"
##
## $continent
## [1] "factor"
##
## $year
## [1] "integer"
##
## $lifeExp
## [1] "numeric"
##
## $pop
## [1] "integer"
##
## $gdpPercap
## [1] "numeric"
# Factor = Nominal in SPSS# new_df <-
gapminder %>%
select (country, lifeExp)# filtered_df <-
gapminder %>%
filter (continent == "Africa", year > 1990) %>%
head()# Saves in global environmentgapminder %>%
mutate(log.gdp = log(gdpPercap)) %>%
head()gapminder %>%
mutate(diff.gdp = gdpPercap - mean(gdpPercap, na.rm = TRUE)) %>%
head()gapminder %>%
group_by(year) %>%
dplyr::summarize(mean_gdp = mean(gdpPercap)) %>%
arrange(desc(year))emp_df <- read_csv("/Users/kareenadelrosario/Desktop/Local R Code/Empathy_Background_randomized.csv")colnames(emp_df)## [1] "ID" "Dyad" "iri1" "iri2" "iri3" "iri4"
## [7] "iri5" "iri6" "iri7" "iri8" "iri9" "iri10"
## [13] "iri11" "iri12" "iri13" "iri14" "iri15" "iri16"
## [19] "iri17" "iri18" "iri19" "iri20" "iri21" "iri22"
## [25] "iri23" "iri24" "iri25" "iri26" "iri27" "iri28"
## [31] "Condition"
#### Option 1
# Change ID, Dyad, and Condition to factor
emp_df$ID <- as.factor(emp_df$ID)
class(emp_df$ID)## [1] "factor"
#### Option 2
## Useful when converting multiple variables. Note: [,X] = by column
# Convert categorical variables to factor
names <- c(1:2, 31)
emp_df[,names] <- lapply(emp_df[,names] , factor)
head(lapply(emp_df, class))## $ID
## [1] "factor"
##
## $Dyad
## [1] "factor"
##
## $iri1
## [1] "numeric"
##
## $iri2
## [1] "numeric"
##
## $iri3
## [1] "numeric"
##
## $iri4
## [1] "numeric"
#### Option 3
## Use dplyr to change class and apply value labels
# Label gender and condition and make sure they're classified as factors
emp_df <-
emp_df %>%
mutate(ID = factor(ID),
Dyad = factor(Dyad),
Condition = factor(Condition, labels = c("Sad", "Control"))) # Change from 0,1 to Control and Sad
emp_df %>%
select(ID, Dyad, Condition) %>%
head()# Delete variables
emp_df %>%
select(-c(iri1)) %>%
head()# Exclude participants by dyad ID
emp_df <- emp_df[ !(emp_df$Dyad %in% c(121, 124, 158, 168, 153)), ]
# Or by row number
emp_df %>%
slice(-c(58:59, 117:118, 137:138, 107:108))emp_df %>%
group_by(Dyad) %>%
dplyr::summarize(mean_iri1 = mean(iri1))#### Option 1
reverse <- emp_df %>%
mutate(iri3r = 6 - iri3,
iri4r = 6 - iri4,
iri7r = 6 - iri7,
iri12r = 6 - iri12,
iri13r = 6 - iri13,
iri14r = 6 - iri14,
iri15r = 6 - iri15,
iri18r = 6 - iri18,
iri19r = 6 - iri19)
#### Option 2
# rowwise tells r to go by ROW not column
# we can also reverse code in one step! (Add -6 before item)
emp_df <- emp_df %>%
rowwise() %>%
mutate(emp_sum = sum(c(iri1, iri2, 6-iri3, 6-iri4, iri5, iri6, 6-iri7, iri8, iri9, iri10,iri11,6-iri12,6-iri13,6-iri14,6-iri15,iri16,iri17,6-iri18,6-iri19,iri20,iri21,iri22,iri23,iri24,iri25,iri26,iri27,iri28)))# This dataset does not have a "partner" variable, which is super important for dyadic data.
## Use mutate to create a "partner" variable.
emp_df <- emp_df %>%
mutate(ID.n = as.numeric(as.character(ID)),
Dyad.n = as.numeric(as.character(Dyad)),
partner = ID.n - (Dyad.n * 10)) %>%
mutate(partner = factor(partner))
# Preview variables
emp_df %>%
select(Dyad, ID, partner)## If we wanted to recode an existing variable, we could use this function: emp_df$Condition[emp_df$Condition == 1] <- 'Sad'
## We'll need to create a new condition variable to distinguish each individual condition (sad actors vs sad partners vs control dyad)
emp_df <- emp_df %>%
mutate(p_cond = ifelse( (Condition %in% "Sad") & (partner %in% 1), 2,
ifelse( (Condition %in% "Sad") & (partner %in% 2), 1, 3))) %>%
mutate(p_cond = factor(p_cond))
#### Sad Partner = 1
#### Sad Actor = 2
#### Control Dyad = 3
## Double-check new variable
emp_df %>%
select(ID, Condition, partner, p_cond)# Remove missing values from emp_df
emp_df <- emp_df %>%
drop_na(emp_sum, Condition) %>%
group_by (Dyad) %>%
filter(n() == 2) %>%
ungroup()df_wide <- read_csv("wide_df.csv")#### Option 1
df_long <- pivot_longer(df_wide,
cols = !ID, # variables that should be left alone
names_to = "Year", # header of wide_df
values_to = "Value") # values that correspond to variable names
head(df_long)#### Option 2
df_wide %>%
pivot_longer(
cols = starts_with("199"), # could specify which columns to pivot
names_to = "Year",
values_to = "Value",
values_drop_na = TRUE # exclude NAs
)df_wide2 <- pivot_wider(df_long,
names_from = Year,
values_from = Value,
values_fill = 999) # fill NA with 999
head(df_wide2)### Is it identical to our other wide df?
setequal(df_wide, df_wide2)## New names:
## * `1990` -> ...1990
## * `1992` -> ...1992
## * `1994` -> ...1994
## * `1996` -> ...1996
## New names:
## * `1990` -> ...1990
## * `1992` -> ...1992
## * `1994` -> ...1994
## * `1996` -> ...1996
## [1] TRUE
var_mean <- function(variable)
{
mean <- sum(variable)/length(variable)
cat("Mean = ", mean)
}
# Create data
test_data <- c(5,12,98,23,45,7,86,34)
# Now let's try it out
var_mean(test_data)## Mean = 38.75
depression_data <- read.csv("depression_example_data.csv")dep_model <- glm (depression ~ intervention + weeks,
data = depression_data,
family = binomial(),
na.action = na.omit) # casewise deletion
summary(dep_model)##
## Call:
## glm(formula = depression ~ intervention + weeks, family = binomial(),
## data = depression_data, na.action = na.omit)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6025 -1.0572 0.8107 0.8161 1.3095
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.234660 1.220563 -0.192 0.84754
## intervention 1.233532 0.414565 2.975 0.00293 **
## weeks -0.007835 0.175913 -0.045 0.96447
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 154.08 on 112 degrees of freedom
## Residual deviance: 144.16 on 110 degrees of freedom
## AIC: 150.16
##
## Number of Fisher Scoring iterations: 4
logistic_r <- function(LogModel) {
dev <- LogModel$deviance
nullDev <- LogModel$null.deviance
modelN <- length(LogModel$fitted.values)
R.l <- 1 - dev / nullDev
R.cs <- 1- exp ( -(nullDev - dev) / modelN)
R.n <- R.cs / ( 1 - ( exp (-(nullDev / modelN))))
cat("Pseudo R^2 for logistic regression\n")
cat("Hosmer and Lemeshow R^2 ", round(R.l, 3), "\n")
cat("Cox and Snell R^2 ", round(R.cs, 3), "\n")
cat("Nagelkerke R^2 ", round(R.n, 3), "\n")
}logistic_r(dep_model)## Pseudo R^2 for logistic regression
## Hosmer and Lemeshow R^2 0.064
## Cox and Snell R^2 0.084
## Nagelkerke R^2 0.113
# Import data
df_shape <- read_spss("miis_data_dict_convertedvariables_updated.sav") %>%
dplyr::rename(dyad_id = Dyad.ID, condition = dyad_cond_ec, role = role_ec,
share = tick_share,
keep = tick_keep,
pshare = tick_part_share,
pkeep = tick_part_keep) %>%
dplyr::rename_all(tolower)## re-encoding from UTF-8
df_shape1 <- df_shape %>%
select(-c(v1,as.numeric, dyad_cond:dyad_cond_w, role_d:neg_role,dic_dist_1:dic_part_2.0,meta_tick_part_keep,id))
colnames(df_shape1)## [1] "dyad_id" "condition" "role" "keep" "share" "pshare"
## [7] "pkeep"
# Preview df and arrange by Dyad ID
df_shape1 %>%
arrange(dyad_id)Role: -1 = partner, 1 = actor
Condition: -1 = control, 1 = experimental
keep: How many raffle tickets would you like to KEEP for yourself?
share: How many raffle tickets would you like to GIVE to your partner?
pshare: How many raffle tickets do you think your PARTNER would GIVE to you?
pkeep: How many raffle tickets do you think your PARTNER would KEEP for themselves?
shape_dyad <- df_shape1 %>%
arrange(dyad_id) %>% # sort by dyad ID (optional)
gather(key,value, # key = category, value = measurement (names are arbitrary)
-dyad_id, # don't gather items with (-). These items will repeat.
-condition,
-role) %>%
mutate(role = ifelse(role == 1,"a","p")) %>% # if role = 1, label with 'a'=actor. 'p'=partner
unite(new_key,key,role,sep = "_",remove=T) %>% # basically tells it to have role act as key
spread(new_key,value) # spread key value (role) into new columns
shape_dyadpair_shape <- df_shape1 %>%
split(.$dyad_id) %>% # create mini dfs by dyad id
map_df(function(x){ # create a function (x) that applies to each of these dfs
# Separating out actor and target
actor <- x %>%
mutate(act.par = ifelse(role == 1,"s","o")) %>% # act.par = if 1, self = actor
gather(key,value,
-dyad_id,
-condition,
-act.par) %>%
unite(new_key,key,act.par) %>%
spread(new_key,value)
partner <- x %>%
mutate(act.par = ifelse(role == 1,"o","s")) %>% # act.par = if 1, other = actor
gather(key,value,
-dyad_id,
-condition,
-act.par) %>%
unite(new_key,key,act.par) %>%
spread(new_key,value)
bind_rows(actor, partner) # now combine these actor and partner dfs
}) %>%
mutate(partnum = ifelse(role_s == 1,1,2)) %>% # partnum: actor = 1, partner = 2
select(dyad_id,partnum,condition,role_s, ends_with("_s"),ends_with("_o"))
pair_shape %>%
select(dyad_id,
partnum,
condition,
role_s,
role_o,
keep_s,
share_s,
pkeep_s,
pshare_s,
keep_o,
share_o,
pkeep_o,
pshare_o)dyad_pair_shape <- shape_dyad %>%
gather(key,value,-dyad_id,-condition) %>% #
mutate(role = ifelse(str_detect(key,"_a"),1,-1), # Going back to individual level
key = str_replace(key,"_a|_p","")) %>% #
spread(key,value) %>% #
split(.$dyad_id) %>% #
map_df(function(x){ #
#
actor <- x %>% #
mutate(act.par = ifelse(role == 1,"s","o")) %>% #
gather(key,value, # individual
-dyad_id, # to
-condition, # pairwise
-act.par) %>% #
unite(new_key,key,act.par) %>% #
spread(new_key,value) #
partner <- x %>%
mutate(act.par = ifelse(role == 1,"o","s")) %>%
gather(key,value,
-dyad_id,
-condition,
-act.par) %>%
unite(new_key,key,act.par) %>%
spread(new_key,value)
bind_rows(actor, partner)
}) %>%
mutate(partnum = ifelse(role_s == 1,1,2)) %>%
select(dyad_id,partnum,condition,role_s, ends_with("_s"),ends_with("_o"))
dyad_pair_shapesetequal(pair_shape,dyad_pair_shape)## [1] TRUE
library(ggplot2)
library(reshape)
library(plyr)mtcars$cyl <- as.factor(mtcars$cyl)
# Data, aethetics (x, y), point
ggplot(mtcars, aes(cyl, mpg)) +
geom_point()# Change the color aesthetic to a size aesthetic
ggplot(mtcars, aes(wt, mpg, color = disp, size = disp)) +
geom_point()As a general rule, if you want to set an aesthetic to a specific value, you would specify that outside of aes(). For example, if you specify (color = “blue”), you would not place it in aes(). However, if you want to specify how the aesthetics should be used, you would place it inside aes(). For example, if you want gender to be represented as separate colors, you would use (aes(color = gender)).
examData <- read.delim("Exam Anxiety.dat", header = TRUE)#Simple scatter
scatter <- ggplot(examData, aes(Anxiety, Exam))
scatter + geom_point() + labs(x = "Exam Anxiety", y = "Exam Performance %") #Simple scatter with smooth with CI
scatter <- ggplot(examData, aes(Anxiety, Exam))
scatter + geom_point() + geom_smooth() + labs(x = "Exam Anxiety", y = "Exam Performance %") ## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#Simple scatter with regression line
scatter <- ggplot(examData, aes(Anxiety, Exam))
scatter + geom_point() + geom_smooth(method = "lm", colour = "Red", se = F) + labs(x = "Exam Anxiety", y = "Exam Performance %") #Simple scatter with regression line + CI
scatter <- ggplot(examData, aes(Anxiety, Exam))
scatter + geom_point() + geom_smooth(method = "lm", colour = "Red") + labs(x = "Exam Anxiety", y = "Exam Performance %") #Simple scatter with regression line + coloured CI
scatter <- ggplot(examData, aes(Anxiety, Exam))
scatter + geom_point() + geom_smooth(method = "lm", colour = "Red", alpha = 0.1, fill = "Red") + labs(x = "Exam Anxiety", y = "Exam Performance %") #Grouped scatter with regression line + CI
scatter <- ggplot(examData, aes(Anxiety, Exam, colour = Gender))
scatter + geom_point() + geom_smooth(method = "lm", aes(fill = Gender), alpha = 0.1) + labs(x = "Exam Anxiety", y = "Exam Performance %", colour = "Gender")scatter2 <- scatter + geom_point() + geom_smooth(method = "lm", aes(fill = Gender), alpha = 0.1) + labs(x = "Exam Anxiety", y = "Exam Performance %", colour = "Gender")
# Scatter plot
scatter2 + scale_color_manual(values=c("blue", "green"))# Change line color
# http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf
scatter2 + scale_color_manual(values=c("deepskyblue1", "darkseagreen1"))# Change panel color
scatter2 + theme(panel.background = element_rect(fill = 'white'))# Change plot color
scatter2 + theme(plot.background = element_rect(fill = 'black', color = 'black'),
panel.background = element_rect(fill = 'black'),
axis.title = element_text(color = 'white'))festivalData <- read.delim("DownloadFestival(No Outlier).dat", header = TRUE)festivalHistogram <- ggplot(festivalData, aes(day1))
festivalHistogram + geom_histogram(binwidth = 0.4) + labs(x = "Hygiene (Day 1 of Festival)", y = "Frequency")# install.packages("extrafont")
library(extrafont)
# Change color and font
festivalHistogram + geom_histogram(binwidth=0.2,color="black", fill="lightskyblue2") + theme(text = element_text(size = 12, family = "Comic Sans MS")) + facet_wrap("gender")festivalDensity <- ggplot(festivalData, aes(day1))
festivalDensity + geom_density() + labs(x = "Hygiene (Day 1 of Festival)", y = "Density Estimate")# Density by gender
festivalDensity + geom_density() + aes(fill = gender)# Change opacity and labels
festivalDensity + geom_density(aes(fill = gender), alpha = 0.4) + labs(x = "Hygiene (Day 1 of Festival)", y = "Density Estimate")festivalBoxplot2 <- ggplot(festivalData, aes(gender, day1))
festivalBoxplot2 + geom_boxplot() + labs(x = "Gender", y = "Hygiene (Day 1 of Festival)")festivalBoxplot2 + geom_boxplot(aes(fill = gender)) + geom_point() + labs(x = "Gender", y = "Hygiene (Day 1 of Festival)")festivalBoxplot2 + geom_boxplot(aes(fill = gender)) + geom_jitter(alpha = 0.2) + labs(x = "Gender", y = "Hygiene (Day 1 of Festival)")df_shape1$condition[df_shape1$condition == -1] <- "Control"
df_shape1$condition[df_shape1$condition == 1] <- "Experimental"
df_shape1$role[df_shape1$role == -1] <- "Partner"
df_shape1$role[df_shape1$role == 1] <- "Actor"bar <- ggplot(df_shape1, aes(condition, share))
bar2 <- bar +
stat_summary(aes(condition, share, fill = role ),
fun = mean, # display the means
geom = "bar",
position="dodge") +
stat_summary(aes(condition, share, fill = role ),
fun.data = mean_cl_normal, # 95% CI assuming normality (other option would be _boot)
geom = "errorbar",
position=position_dodge(width=0.90),
width = 0.2) +
labs(x = "Condition", y = "Money Shared", fill = "Role") +
scale_fill_manual(values=c("deepskyblue1", "slategray3"))## Warning: Ignoring unknown aesthetics: fill
bar2library(ggsignif)
bar <- ggplot(df_shape1, aes(condition, share))
bar2 +
scale_y_continuous(breaks=seq(0, 15, 1)) + # specifies breaks (0-15 at every 1pt)
coord_cartesian(ylim =c(0, 15)) + # y-axis on 0-15 scale
geom_signif(y_position = c(7.6, 8.5), xmin = c(0.8, 1.8), xmax = c(1.2, 2.2), #sig bars between roles
annotation = c("NS", "**"), tip_length = 0, color= "#756F6F") +
geom_signif(comparisons = list(c("Control", "Experimental")), map_signif_level=TRUE, #sig bars between condition
annotations = "NS", y_position = 11, color= "#756F6F") + theme_classic()# Simulate meaningful fake data
## Depression over time with/without treatment
fake_data3 <- sample(c(0,1), size = nrow(depression_data), replace = TRUE)
fake_data3 <- as.data.frame(ifelse(fake_data3==1, yes = rnorm(20, 20, 1), no = rnorm(20, 20, 1)))
dep_data <- cbind(depression_data, fake_data3)
names(dep_data)[4] <- "Baseline"
fake_data <- sample(c(0,1), size = nrow(depression_data), replace = TRUE)
fake_data <- as.data.frame(ifelse(fake_data==1, yes = rnorm(20, 20, 2), no = rnorm(20, 12, 2)))
names(fake_data)[1] <- "Six_Weeks"
fake_data <- fake_data %>% arrange(Six_Weeks)
dep_data <- dep_data %>% arrange(desc(intervention))
dep_data <- cbind(dep_data, fake_data)
dep_data$intervention[dep_data$intervention == 0] <- "No Intervention"
dep_data$intervention[dep_data$intervention == 1] <- "Intervention"
# Reshape wide to long
dep_data$ID <- seq_along(dep_data[,1])
dep_data1 <- dep_data %>%
select(-c(depression, weeks)) %>%
melt(id = c("ID", "intervention"), measured = c("Baseline", "Six_Weeks")) %>%
dplyr::rename(Time = variable, Depression_Level = value) %>%
arrange(ID)line <- ggplot(dep_data1, aes(Time, Depression_Level, color = intervention))
line + stat_summary(fun = mean, geom = "line", aes(group = intervention))line + stat_summary(fun = mean, geom = "line", aes(group= intervention)) + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2) + labs(x = "Time", y = "Depression", colour = "Intervention") + ylim(5, 30)line +
stat_summary(fun = mean, geom = "point", aes(shape = intervention), size = 4) + # Shape of point by group
stat_summary(fun = mean, geom = "line", aes(group= intervention, linetype = intervention)) + # Dashed or solid line by group
stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2) + # 95% CI
labs(x = "Time", y = "Mean Depression Score", colour = "Group", shape= "Group", linetype = "Group") + ylim(5, 30) # Labels and range of y-axislibrary(RColorBrewer)
library(gapminder)
library(dplyr)
gm2007.1 <- gapminder %>%
filter(year == 2007) %>%
slice_max(lifeExp, n = 10)
gm2007.2 <- gapminder %>%
filter(year == 2007) %>%
slice_min(lifeExp, n = 10)
gm2007 <- rbind(gm2007.1, gm2007.2)
# Add a geom_segment() layer
ggplot(gm2007, aes(x = lifeExp, y = country, color = lifeExp)) +
geom_point(size = 4) +
geom_segment(aes(xend = 30, yend = country), size = 2)# Set the color scale
palette <- brewer.pal(5, "RdYlBu")[-(2:4)]
global_mean <- mean(gm2007$lifeExp)
x_start <- global_mean + 3
y_start <- 13
x_end <- global_mean
y_end <- 13.5
# Add a title and caption
plt_country_vs_lifeExp <- ggplot(gm2007, aes(x = lifeExp, y = country, color = lifeExp)) +
geom_point(size = 4) +
geom_segment(aes(xend = 30, yend = country), size = 2) +
geom_text(aes(label = round(lifeExp,1)), color = "white", size = 1.5) +
scale_x_continuous("", expand = c(0,0), limits = c(30,90), position = "top") +
scale_color_gradientn(colors = palette) +
labs(title = "Highest and lowest life expectancies, 2007", caption = "Source: gapminder")
plt_country_vs_lifeExp +
theme_classic() +
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text = element_text(color = "black"),
axis.title = element_blank(),
legend.position = "none") +
geom_vline(xintercept = global_mean, color = "grey40", linetype = 3) +
annotate("text", x = x_start, y = y_start, label = "The\nglobal\naverage", vjust = 1.1, size = 3, family = "Times", color = "grey40") +
annotate("curve", x = x_start, y = y_start, xend = x_end, yend = y_end, arrow = arrow(length = unit(0.1, "cm"), type = "closed"), color = "grey40"
) +
theme(text = element_text(family = "Times"))require(gganimate)## Loading required package: gganimate
ggplot(gapminder, aes(gdpPercap, lifeExp, size = pop, colour = country)) +
geom_point(alpha = 0.7, show.legend = FALSE) +
scale_colour_manual(values = country_colors) +
scale_size(range = c(2, 12)) +
scale_x_log10() +
facet_wrap(~continent) +
# Here comes the gganimate specific bits
labs(title = 'Year: {frame_time}', x = 'GDP per capita', y = 'life expectancy') +
transition_time(year) +
ease_aes('linear')